Welcome.

First let’s set up an environment from which to run our code today. Open a terminal window and run the following:

## load conda
module purge
module load Anaconda3/2022.10
eval "$(conda shell.bash hook)"

Now create an environment using conda create:

conda create -p /data/projects/punim1445/python/YOUR_USERNAME_HERE mamba python=3.12

And install the packages we’ll need for this workshop using mamba:

## activate
conda activate /data/projects/punim1445/python/YOUR_USERNAME_HERE

## install
mamba install r-base r-tidyverse r-seurat r-dbscan scanpy

## also need r-devtools
mamba install r-devtools

Finally, make a personal subdirectory under the ST workshop folder in punim1445.

mkdir /data/projects/punim1445/ST_workshop/YOUR_USERNAME_HERE

We need to install a few more things but this will do for the moment.

Next, start up an open on demand session with your new environment.

Choose:

Rstudio with conda

Conda environment to load: /data/projects/punim1445/python/YOUR_USERNAME_HERE

R libs user directory: /data/projects/punim1445/python/YOUR_USERNAME_HERE/lib/R/library

And 8 hours so you have a session going for the whole day.

With the session up and running, let’s test everything works.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## 
## The following objects are masked from 'package:base':
## 
##     intersect, t
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(reticulate)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
use_condaenv('/data/projects/punim1236/python/YOUR_USERNAME_HERE') ## reticulate has to be told to do this even though it's literally installed inside the conda environment.

## for you guys it will be punim1445, but I did this in 1236 yesterday

We still need a few more packages to get through this workshop.

First is ‘spacexr’ which contains the RCTD algorithm. You’ll need to install it from github.

options(download.file.method="wget")
devtools::install_github('https://github.com/dmcable/spacexr')

Now load spacexr.

library(spacexr)

And the last thing you’ll need for now, tidy-seurat. Note that you might need to perform both of these steps from the terminal because of connectivity issues with R in open on demand. Worst case scenario you can just use my environment.

install.packages('BiocManager')
BiocManager::install("tidyomics")

Now load tidyomics.

library(tidyomics)
## ── Attaching core tidyomics packages ─────────────────────── tidyomics 0.99.7 ──
## ✔ nullranges               1.8.0      ✔ tidyseurat               0.8.0 
## ✔ plyranges                1.22.0     ✔ tidySingleCellExperiment 1.12.0
## ✔ tidybulk                 1.14.3     ✔ tidySummarizedExperiment 1.12.0
## ── Conflicts ────────────────────────────────────────── tidyomics_conflicts() ──
## ✖ plyranges::between()     masks dplyr::between()
## ✖ tidybulk::bind_cols()    masks ttservice::bind_cols(), dplyr::bind_cols()
## ✖ plyranges::filter()      masks tidybulk::filter(), dplyr::filter(), stats::filter()
## ✖ S4Vectors::findMatches() masks utils::findMatches()
## ✖ S4Vectors::head()        masks utils::head()
## ✖ plyranges::n()           masks dplyr::n()
## ✖ plyranges::n_distinct()  masks dplyr::n_distinct()
## ✖ GenomicRanges::reduce()  masks IRanges::reduce(), purrr::reduce()
## ✖ IRanges::relist()        masks BiocGenerics::relist(), utils::relist()
## ✖ tidybulk::rename()       masks S4Vectors::rename(), dplyr::rename()
## ✖ plyranges::slice()       masks IRanges::slice(), dplyr::slice()
## ✖ IRanges::stack()         masks S4Vectors::stack(), utils::stack()
## ✖ S4Vectors::tail()        masks utils::tail()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyseurat)

Now we’re ready to get to work on some ST data.

First, we’ll load some spleen ST data generated by Ollie. The file we’re loading is direct output from curio’s preprocessing pipeline.

Using tidyomics, we can also chain together some preprocessing commands, but we’ll do that after getting some useful metrics on capture rate.

sptheme <- list(
  coord_fixed(),
  theme(axis.ticks = element_blank(),
        axis.text = element_blank()),
  scale_colour_viridis_c(option = 'magma', direction=-1, begin=0.1, end=0.95)
)

slice <- readRDS('/data/gpfs/projects/punim1445/ST_workshop/example_curio.rds')
DefaultAssay(slice) <- 'RNA'

median(slice$nCount_RNA)
median(slice$nFeature_RNA)

## and we can plot the spatial distribution of mRNA
slice %>% FeaturePlot(reduction='SPATIAL', features='nCount_RNA', order=T) + sptheme
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

## and the distribution of counts
plot(slice$nCount_RNA, slice$nFeature_RNA)

## [1] 131
## [1] 106

Anything with less than 100 UMIs may be rejected by RCTD but it might still be informative. For scRNA-seq data we typically apply a threshold of 200 genes captured to justify keeping a spot but as you can see below, using this threshold for spatial means dumping a lot of data.

slice %>% subset(nFeature_RNA > 200) %>% FeaturePlot(reduction='SPATIAL', features='nFeature_RNA', order=T) + sptheme
## Warning: Adding image data that isn't associated with any assays
## Warning: Not validating Seurat objects
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We’ll go with a gentler threshold and keep all spots with >50 UMIs.

slice %>% subset(nCount_RNA > 50) ->
  slice
## Warning: Adding image data that isn't associated with any assays
## Warning: Not validating Seurat objects
slice %>% FeaturePlot(reduction='SPATIAL', features='nCount_RNA', order=T) + sptheme
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We can also use dbscan to remove spots in outlying areas. We do this by clustering the data and identifying clusters that represent spots disconnected from the bulk of the tissue, presumably on areas of the array that didn’t receive any tissue.

dbout <- dbscan(x = slice[['SPATIAL']]@cell.embeddings, eps = 50, minPts = 10)

slice[['SPATIAL']]@cell.embeddings %>% as.data.frame() %>% mutate(cluster = as.factor(dbout$cluster)) %>%
  ggplot(mapping = aes(x=SPATIAL_1, y=SPATIAL_2, colour=cluster)) + geom_point() + theme_classic() + coord_fixed()

We can see that cluster 1 contains the bulk of the array. Accordingly, we can remove all clusters but 1.

slice %>% mutate(cluster = as.factor(dbout$cluster)) %>% subset(cluster == '1') ->
  slice
## Warning: Adding image data that isn't associated with any assays
## Warning: Not validating Seurat objects
slice %>% FeaturePlot(reduction='SPATIAL', features = 'nCount_RNA') + sptheme
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Let’s finish QC by normalising and scaling the data, then running PCA on highly variable genes.

slice %>%
  NormalizeData() %>%
  FindVariableFeatures(nfeature=3000) %>%
  ScaleData() %>%
  RunPCA() ->
  slice
## Centering and scaling data matrix
## PC_ 1 
## Positive:  Gm2682, Lyz2, Ccl5, Themis, mt-Nd6, Cr2, Inpp4b, Ebf1, mt-Nd5, Fcrl5 
##     Ttn, Camk2b, Gprin3, Cd96, Hivep2, Tasor2, Shc3, Adamts19, Rtn1, Gm47730 
##     Gm11884, Gm47712, Clip4, Hmgcll1, Shroom4, Cdk15, Myl3, Grik1, Cxcl13, Plb1 
## Negative:  Hba-a1, Hba-a2, Hbb-bt, Hbb-bs, Car2, Trim71, Gpx1, Slc4a1, Snca, Mkrn1 
##     Gypa, Rsad2, Mki67, Tmcc2, Bpgm, Cd24a, Xpo7, Ube2l6, Spta1, Tent5c 
##     Ermap, Specc1, Hmbs, Hemgn, Trim10, Trak2, Sptb, Apol11b, Tfdp2, Urod 
## PC_ 2 
## Positive:  Hbb-bt, Hba-a2, Hbb-bs, Hba-a1, Rsad2, Snca, Trim71, Car2, Tmcc2, Gypa 
##     Slc4a1, Spta1, Sox6, Specc1, Apol11b, Hemgn, Ermap, Trim10, Mkrn1, Slc25a21 
##     Ube2l6, Xpo7, Sptb, Bpgm, Tent5c, Slfn14, Nfe2, Tfdp2, March8, S100a9 
## Negative:  Igkc, Ighm, Lyz2, Iglc2, mt-Nd6, Pou2af1, Jchain, Cr2, Ighg2b, Ebf1 
##     Hsp90b1, St6gal1, mt-Nd5, Camk2b, Txndc5, Mfge8, Ighg2c, Inpp4b, Cxcl13, Bank1 
##     Ighg3, Rbbp7, 2010309G21Rik, Trp53inp1, Mbd2, Lpp, Klhl6, Creld2, Kcnq5, Uvrag 
## PC_ 3 
## Positive:  Jchain, Igkc, Txndc5, Slpi, Hsp90b1, Ighm, Creld2, 2010309G21Rik, Ighg2c, Ighg2b 
##     Ighg1, Ighg3, Igha, Trp53inp1, Igkv19-93, Igkv1-117, Igkv5-39, Igkv10-96, Fam214a, Igkv6-23 
##     Igkv8-24, Igkv4-55, Igkv8-30, Hbb-bs, Igkv12-46, Iglc2, Hba-a1, Hbb-bt, Igkv10-94, Hba-a2 
## Negative:  Cr2, Ebf1, Gm10800, mt-Nd6, Bank1, mt-Nd5, Gm10801, Cxcl13, Mfge8, Lyz2 
##     Cd24a, Inpp4b, Kcnq5, Maml2, Rbbp7, Tmem163, Fchsd2, Lpp, Mbd2, Pou2af1 
##     Csmd1, Hivep2, Klhl6, Fcrl5, Arhgap26, Ybx3, Myc, Enpp2, Uvrag, Pdzd2 
## PC_ 4 
## Positive:  Mfge8, mt-Nd6, Lyz2, mt-Nd5, Ccl5, Gclc, Klhl6, Gpx1, Cxcl13, Mbd2 
##     Hmces, Anxa2, Cd24a, Nsd2, Myc, Gm2682, Ybx3, Supt16, Cr2, Elovl5 
##     Dhfr, Rbbp7, Mkrn1, Bank1, Uvrag, Tfdp2, Gtf2b, Spta1, Atp2c1, St6gal1 
## Negative:  Gm10801, Gm10800, Jchain, Ighg1, Ighg2b, Creld2, Ighg2c, 2010309G21Rik, Hsp90b1, Diaph3 
##     Txndc5, Slpi, Csmd1, S100a9, Atxn1, Igkc, Ighg3, Slc25a21, Ighm, Tmcc2 
##     Igkv19-93, Hmbs, Hba-a2, Sox6, Hba-a1, Igha, Robo2, Mctp1, Wwox, Igkv5-39 
## PC_ 5 
## Positive:  Hbb-bs, Hba-a2, Hbb-bt, Hba-a1, Cr2, Bank1, Cxcl13, Ebf1, Bpgm, Iglc2 
##     Rsad2, Mkrn1, Fcrl5, Snca, Tent5c, Mfge8, Ighm, Xpo7, Tmcc2, Trim71 
##     Fchsd2, Inpp4b, Kcnq5, Hivep2, Tmem163, Zfp608, Arhgap26, Enpp2, Atxn1, Gpx1 
## Negative:  Mki67, Cenpe, Cenpf, Ermap, Hemgn, Sptb, Sox6, Diaph3, Gclc, Urod 
##     Ccl5, Cd24a, Gm2682, Slc16a10, Slc25a21, March8, Mbd2, Hmbs, Hmces, Gypa 
##     Spta1, mt-Nd6, Ybx3, Dhfr, Car2, Nfe2, Car1, mt-Nd5, Slc4a1, Anxa2

Check dimensionality.

ElbowPlot(slice)

Then we can run unbiased clustering on the data to determine what structures we can see in the data.

slice %>%
  FindNeighbors(dims = 1:10) %>%
  FindClusters(resolution = 0.5) ->
  slice
## Computing nearest neighbor graph
## Computing SNN
dimtheme <- list(
  coord_fixed(),
  theme(axis.ticks = element_blank(),
        axis.text = element_blank())
)

slice %>% DimPlot(reduction = 'SPATIAL') + dimtheme ->
  clusters
print(clusters)

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 50443
## Number of edges: 1396369
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8921
## Number of communities: 20
## Elapsed time: 18 seconds

I was reminded at this point of writing the code book that we need the Wes Anderson colour scheme package.

install_github('https://github.com/karthik/wesanderson')

We should compare the above clustering to gene expression to determine whether it is meaningful.

library(wesanderson)

plot_grid(
  clusters,
  slice %>% FeaturePlot(reduction='SPATIAL', features='Ighd', order=T) + sptheme,
  nrow=1, ncol=2
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plot_grid(
  clusters,
  slice %>% FeaturePlot(reduction='SPATIAL', features='Tfrc', order=T) + sptheme,
  nrow=1, ncol=2
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plot_grid(
  clusters,
  slice %>% FeaturePlot(reduction='SPATIAL', features='Cxcl13', order=T) + sptheme,
  nrow=1, ncol=2
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plot_grid(
  clusters,
  slice %>% FeaturePlot(reduction='SPATIAL', features='Cd3e', order=T) + sptheme,
  nrow=1, ncol=2
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plot_grid(
  clusters,
  slice %>% FeaturePlot(reduction='SPATIAL', features='Hba-a1', order=T) + sptheme,
  nrow=1, ncol=2
)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

The clustering method shown above doesn’t use spatial data but we can run a method I wrote that smooths PC embeddings across space so that spots share gene expression data. Let’s run it below. However, we first have to convert the units of the spatial coordinates from pixels to microns (scale is 0.647 microns to 1 pixel).

source('/data/gpfs/projects/punim1445/ST_workshop/lna_source.R')

## convert pixels to microns
slice[['SPATIAL']]@cell.embeddings <- slice[['SPATIAL']]@cell.embeddings * 0.647

## run LNA
slice <- RunLNA(object = slice, radius = 50, spatial = 'SPATIAL', dims = 1:10, target = 'pca')
## 10000 spots processed
## 20000 spots processed
## 30000 spots processed
## 40000 spots processed
## 50000 spots processed

Now we can run clustering again. I set the resolution lower here because I ended up with heaps of clusters for some reason.

slice %>%
  FindNeighbors(dims = 1:10, reduction = 'lna') %>%
  FindClusters(resolution = 0.3) ->
  slice
## Computing nearest neighbor graph
## Computing SNN
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 50443
## Number of edges: 1362451
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9098
## Number of communities: 8
## Elapsed time: 9 seconds
cols <- c(wes_palettes$Darjeeling1, wes_palettes$GrandBudapest1)

slice %>% DimPlot(reduction = 'SPATIAL', cols = cols) + dimtheme ->
  clusters
print(clusters)

We can also assign some labels.

slice %>% RenameIdents(
  "0" = "RP",
  "1" = "RP",
  "2" = "MZ",
  "3" = "Follicle",
  "4" = "T cell zone",
  "5" = "Follicle",
  "6" = "RP",
  "7" = "Capsule",
  "8" = "Capsule"
) -> slice
## Warning: Cannot find identity 8
slice %>% DimPlot(reduction = 'SPATIAL', cols = cols) + dimtheme ->
  clusters
print(clusters)

Now it’s time to run deconvolution. We’ll load a reference dataset from which to create cell type profiles. To expedite our analysis I’m going to subset to a small area of the tissue.

slice %>% DimPlot(reduction = 'SPATIAL', cols = cols) + dimtheme +
  geom_hline(yintercept = -1800) + geom_hline(yintercept = -800) + 
  geom_vline(xintercept = -3000) + geom_vline(xintercept = -2000) ->
  clusters
print(clusters)

Let’s subset to this square.

subslice <- slice %>%
  subset(
    SPATIAL_1 > -3000 & SPATIAL_1 < -2000 & SPATIAL_2 > -1800 & SPATIAL_2 < -800
  )
## Warning: Adding image data that isn't associated with any assays
## Warning: Not validating Seurat objects
subslice %>% DimPlot(reduction = 'SPATIAL', cols = cols) + dimtheme ->
  clusters
print(clusters)

Now we have a suitably small dataset on which to run deconvolution, we can have a go at running RCTD. Let’s download our reference dataset.

ref <- readRDS("/data/gpfs/projects/punim1445/ST_workshop/day0_reference_scrnaseq.rds")

## print list of cell types and counts
ref %>% select(cell_type) %>% table()
## tidyseurat says: Key columns are missing. A data frame is returned for independent data analysis.
## cell_type
##          ARC        B ASC         B B1 B follicular         B MZ    CD4+ iNKT 
##         1161           27          190         4654          767           90 
## CD4+ T naive    CD4+ Treg CD8+ T naive         cDC1         cDC2  Erythrocyte 
##         1253          168         1293           66          161          571 
##          FDC          FRC  Granulocyte   Macrophage     Monocyte      NK cell 
##          632         1967           89           31          239          268 
##          pDC        RP FB 
##           41         2570

We first have to convert it to RCTD’s reference scRNA-seq class.

library(spacexr)

## get cell types in factor format
cell_types <- as.factor(ref@meta.data$cell_type)
names(cell_types) <- rownames(ref@meta.data)

ref <- Reference(
  counts = ref@assays$RNA@counts,
  cell_types = cell_types
)

Then we have to convert the subslice object to RCTD’s spatial object class.

## get data for converting puck to SpatialRNA
counts <- subslice@assays$RNA@counts
coords <- as.data.frame(Embeddings(subslice,reduction="SPATIAL"))
colnames(coords) <- c("x","y")

## get UMI counts column for platform adjustment
nUMI <- as.numeric(subslice@meta.data$nCount_RNA)
names(nUMI) <- colnames(counts)
  
## convert puck to SpatialRNA format for RCTD
subslice_spatialRNA <- new(
  "SpatialRNA",
  counts = counts,
  coords = coords,
  nUMI = nUMI)

Now we can run RCTD.

## create RCTD object
rctd.obj <- create.RCTD(
  subslice_spatialRNA, ref,
  max_cores = 1)
## Begin: process_cell_type_info
## process_cell_type_info: number of cells in reference: 16238
## process_cell_type_info: number of genes in reference: 32864
## 
##          ARC        B ASC         B B1 B follicular         B MZ    CD4+ iNKT 
##         1161           27          190         4654          767           90 
## CD4+ T naive    CD4+ Treg CD8+ T naive         cDC1         cDC2  Erythrocyte 
##         1253          168         1293           66          161          571 
##          FDC          FRC  Granulocyte   Macrophage     Monocyte      NK cell 
##          632         1967           89           31          239          268 
##          pDC        RP FB 
##           41         2570
## Warning in asMethod(object): sparse->dense coercion: allocating vector of size
## 1.1 GiB
## End: process_cell_type_info
## create.RCTD: getting regression differentially expressed genes:
## get_de_genes: ARC found DE genes: 426
## get_de_genes: B ASC found DE genes: 157
## get_de_genes: B B1 found DE genes: 75
## get_de_genes: B follicular found DE genes: 102
## get_de_genes: B MZ found DE genes: 63
## get_de_genes: CD4+ iNKT found DE genes: 97
## get_de_genes: CD4+ T naive found DE genes: 97
## get_de_genes: CD4+ Treg found DE genes: 81
## get_de_genes: CD8+ T naive found DE genes: 103
## get_de_genes: cDC1 found DE genes: 93
## get_de_genes: cDC2 found DE genes: 109
## get_de_genes: Erythrocyte found DE genes: 152
## get_de_genes: FDC found DE genes: 459
## get_de_genes: FRC found DE genes: 384
## get_de_genes: Granulocyte found DE genes: 361
## get_de_genes: Macrophage found DE genes: 268
## get_de_genes: Monocyte found DE genes: 248
## get_de_genes: NK cell found DE genes: 143
## get_de_genes: pDC found DE genes: 144
## get_de_genes: RP FB found DE genes: 458
## get_de_genes: total DE genes: 2298
## create.RCTD: getting platform effect normalization differentially expressed genes:
## get_de_genes: ARC found DE genes: 793
## get_de_genes: B ASC found DE genes: 321
## get_de_genes: B B1 found DE genes: 213
## get_de_genes: B follicular found DE genes: 240
## get_de_genes: B MZ found DE genes: 230
## get_de_genes: CD4+ iNKT found DE genes: 292
## get_de_genes: CD4+ T naive found DE genes: 228
## get_de_genes: CD4+ Treg found DE genes: 280
## get_de_genes: CD8+ T naive found DE genes: 232
## get_de_genes: cDC1 found DE genes: 258
## get_de_genes: cDC2 found DE genes: 256
## get_de_genes: Erythrocyte found DE genes: 229
## get_de_genes: FDC found DE genes: 982
## get_de_genes: FRC found DE genes: 807
## get_de_genes: Granulocyte found DE genes: 621
## get_de_genes: Macrophage found DE genes: 602
## get_de_genes: Monocyte found DE genes: 467
## get_de_genes: NK cell found DE genes: 367
## get_de_genes: pDC found DE genes: 328
## get_de_genes: RP FB found DE genes: 1048
## get_de_genes: total DE genes: 3884
## run.RCTD - this is the step that runs RCTD, takes around 1 day for a typical puck of 30-40k spots.
#rctd.obj <- run.RCTD(rctd.obj)

Just kidding. I tried it last night and it took way too long.

Let’s load some RCTD output of Slide-seqV2 on naive mouse spleen.

rctd.obj <- readRDS("/data/gpfs/projects/punim1445/ST_workshop/example_rctd.rds")
rctd.obj <- rctd.obj@RCTD.reps[[4]]

Next we have to analyse the RCTD data. There are several outputs. The first is a spot classification. Spots may be annotated as one of singlet, doublet (certain) - i.e. two known cell types, doublet (uncertain) - i.e. one known and one unknown cell type, or reject (too few UMIs or unclear cell types present). I don’t typically use this output much but it can be helpful in understanding the extent to which multiple cells are captured in a single spot via array-based ST.

Note that doublets composed of the same cell type are impossible to detect so singlets are overcounted.

Let’s extract the data and plot a pie chart of it.

library(RColorBrewer)
cols = brewer.pal(n=4,name="Spectral")[c(1,3,4,2)]

plotdata <- rctd.obj@results$results_df$spot_class %>% 
  table() %>% as.data.frame() %>% set_names(nm = c('Class','Freq')) %>%
  arrange(desc(Class)) %>%
  mutate(prop = Freq / sum(Freq) * 100) %>%
  mutate(ypos = cumsum(prop) - 0.5 * prop)

## plot
pie <- ggplot(
  data = plotdata,
  aes(x = 2, y = prop, fill = Class)) +
  geom_bar(stat = "identity") + coord_polar("y", start = 0) +
  geom_text(aes(y = ypos, label = Freq)) +
  theme_void() + xlab(label=NULL) + ylab(label=NULL) +
  scale_fill_manual(values = cols) +
  xlim(0.75,2.5) +
  theme(strip.text = element_text(size=15))
print(pie)

Next is another classification assessment in which RCTD will assign each spot two cell type labels reflecting the highest and second highest-contributing cell types at each spot. For example, if a spot is composed of 60% monocyte and 40% B cell the cell type 1 annotation will be monocyte and cell type 2 will be B cell. However, the algorithm forces an annotation, so even if cell type 1 composes 99% of the spot and cell type 2 only a pathetic 0.5%, the spot will still receive a cell type 2 annotation, but we can ignore it.

We can also extract this data and plot it.

## get data
plotdata <- rctd.obj@spatialRNA@coords %>%
  as.data.frame() %>%
  mutate(type = rctd.obj@results$results_df$first_type) %>%
  filter(type %in% c('B follicular','CD4+ T naive','Monocyte'))

## plot
plotdata %>%
  ggplot(mapping = aes(x=x, y=y, colour=type)) +
  geom_point() + scale_colour_manual(values = c('royalblue','gold2','red3')) +
  theme_classic() + dimtheme -> celltypes_plot
print(celltypes_plot)

Finally, RCTD generates a non-discrete output, a matrix of dimensions spots x cell types in which each value reflects the relative contribution of each cell type to each spot. First we’ll prepare the data by normalising each column 0 to 1 for ease of use, then we’ll add x and y coordinates, then we’ll plot it.

## custom function
normalize <- function(x){
  x <- (x-min(x))/(max(x)-min(x))
  return(x)
}

## get data
out <- as.data.frame(as.matrix(rctd.obj@results$weights))

## normalize column-wise
out <- 
  apply(X = out, MARGIN = 2, FUN = normalize) %>%
  as.data.frame()
  
## get coords
coords <- as.data.frame(rctd.obj@spatialRNA@coords)

## filter to spot barcodes in the RCTD output
coords <- coords[rownames(out),]
  
## centre coords for rotation
coords$x <- coords$x - mean(coords$x)
coords$y <- coords$y - mean(coords$y)
  
## convert pixels to microns
coords[,c('x','y')] <- coords[,c('x','y')] * 0.665
  
## add coord data to output for plotting
out <- out %>% 
  mutate(x = coords$x, y = coords$y) %>%
  relocate(y) %>% relocate(x)

Having prepared the data, let’s take a look at the first few rows so you can see what we did.

head(out, n=5) ## top 5 rows
##                        x          y          ARC        B ASC         B B1
## TCAAGCAGCTTGGA -1489.417 -150.23245 0.0002908978 1.119616e-04 5.241448e-05
## TTGCAAGCCGCTTG -1208.600  -52.14495 0.0002036271 7.837236e-05 3.668989e-05
## AAATGCCCCTCATA -1043.348  -26.27645 0.0236672344 7.837236e-05 3.488157e-03
## ATCAGTTTCTGGTT -1472.379 -148.50345 0.0002908978 4.957669e-03 9.939734e-05
## TCCTTCCTCCCCGT -1301.767  -84.53045 0.0276027802 3.104813e-03 1.258409e-05
##                B follicular         B MZ    CD4+ iNKT CD4+ T naive    CD4+ Treg
## TCAAGCAGCTTGGA   0.45993726 9.833032e-02 0.0084542348 1.476158e-02 3.463330e-02
## TTGCAAGCCGCTTG   0.10002524 3.692036e-05 0.0002116315 3.250962e-01 3.543304e-01
## AAATGCCCCTCATA   0.56426319 4.761207e-02 0.0415301670 3.971266e-05 4.038248e-02
## ATCAGTTTCTGGTT   0.44938915 2.115833e-01 0.0046998011 1.110036e-04 6.801656e-05
## TCCTTCCTCCCCGT   0.02685035 1.266314e-05 0.0601899219 2.481209e-01 8.330190e-02
##                CD8+ T naive         cDC1         cDC2 Erythrocyte          FDC
## TCAAGCAGCTTGGA 5.617389e-05 5.563653e-02 0.0099078938 0.011072507 0.1866914577
## TTGCAAGCCGCTTG 2.027853e-01 7.010265e-02 0.0066280904 0.005018374 0.0002261041
## AAATGCCCCTCATA 3.932146e-05 5.346482e-05 0.0372848523 0.009653404 0.0600874896
## ATCAGTTTCTGGTT 2.329097e-02 7.637937e-05 0.0000577419 0.009371928 0.0371976163
## TCCTTCCTCCCCGT 4.507050e-01 1.567693e-02 0.0384560004 0.007839930 0.0125016407
##                        FRC  Granulocyte  Macrophage     Monocyte      NK cell
## TCAAGCAGCTTGGA 0.011376853 1.524338e-04 0.029992029 1.137459e-02 1.080788e-04
## TTGCAAGCCGCTTG 0.038947487 1.067030e-04 0.007647286 6.644001e-05 7.565464e-05
## AAATGCCCCTCATA 0.001273277 1.584358e-03 0.006451366 2.928688e-03 1.671924e-03
## ATCAGTTTCTGGTT 0.084918932 2.613688e-03 0.013258990 9.707855e-03 1.080788e-04
## TCCTTCCTCCCCGT 0.025112664 3.659754e-05 0.041440889 7.923031e-03 2.594842e-05
##                         pDC        RP FB
## TCAAGCAGCTTGGA 5.534228e-03 0.0130733383
## TTGCAAGCCGCTTG 6.520544e-05 0.0001341761
## AAATGCCCCTCATA 8.977528e-03 0.0390267540
## ATCAGTTTCTGGTT 9.315126e-05 0.0138158883
## TCCTTCCTCCCCGT 2.236450e-05 0.0000460204

And now we can visualise the data.

## plot
ggplot() +
        geom_point(
            data = out, mapping = aes(x=x,y=y,alpha=`B follicular`),
            color = "cyan", size = 0.5) +
        geom_point(
            data = out, mapping = aes(x=x,y=y,alpha=`CD4+ T naive`),
            color = "gold", size = 0.5) +
        geom_point(
            data = out, mapping = aes(x=x,y=y,alpha=`Monocyte`),
            color = "red", size = 0.5) +
        scale_alpha_continuous(range = c(0,1), breaks=c(0,0.1,1)) +
        theme_minimal() +
        theme(
            panel.grid = element_blank(),
            legend.position = "none",
            plot.background = element_rect(fill = "black"),
            panel.background = element_rect(fill = "black"),
            axis.line = element_blank(),
            text = element_blank(),
            plot.title = element_text(size = 30),
            plot.title.position = 'panel',
            axis.title = element_blank()) +
        coord_fixed() ->
  deconv_plot

print(deconv_plot)

You can plot any cell types from this list:

out %>% select(-x, -y) %>% colnames()
##  [1] "ARC"          "B ASC"        "B B1"         "B follicular" "B MZ"        
##  [6] "CD4+ iNKT"    "CD4+ T naive" "CD4+ Treg"    "CD8+ T naive" "cDC1"        
## [11] "cDC2"         "Erythrocyte"  "FDC"          "FRC"          "Granulocyte" 
## [16] "Macrophage"   "Monocyte"     "NK cell"      "pDC"          "RP FB"

Now let’s run a colocalisation assessment. The data is already in the correct format so we’ll import some code that allows us to test cell type-cell type correlation in space.

source('/data/gpfs/projects/punim1445/ST_workshop/colocalisation_function_source.R')

## view the code
SpCor
## function (query, var1, var2, x = "x", y = "y", radius = 50, nbins = 2000, 
##     verbose = F) 
## {
##     if (verbose == T) {
##         message("Binning spatial data")
##     }
##     pos = dbscan::frNN(query[, c(x, y)], eps = radius)
##     query = as.data.frame(query[, c(var1, var2)])
##     query$var1_mean = NA
##     query$var2_mean = NA
##     bins = sample(x = 1:nrow(query), size = nbins)
##     for (i in bins) {
##         query$var1_mean[i] = mean(query[, var1][pos$id[[i]]])
##         query$var2_mean[i] = mean(query[, var2][pos$id[[i]]])
##     }
##     query = query %>% filter(!is.na(var1_mean))
##     query = query[, c("var1_mean", "var2_mean")]
##     colnames(query) = c(var1, var2)
##     cor.res = cor.test(x = query[, var1], y = query[, var2], 
##         method = "spearman")
##     message("Correlation: ", cor.res$estimate)
##     results <- list()
##     results[["cor"]] = cor.res$estimate
##     results[["pval"]] = cor.res$p.value
##     results[["misc"]] = cor.res
##     results[["Cell type 1"]] = var1
##     results[["Cell type 2"]] = var2
##     return(results)
## }

Now we can assess correlation of all cell types. It requires a fair bit of code including some loops, but I’ve already written it for you. Let’s test it on a subset of the data so it doesn’t take too long.

coloc_analysis
## function (input_data, x = "x", y = "y", nbins = 2000, verbose = F, 
##     radius = 50) 
## {
##     celltypes <- colnames(input_data %>% select(-x, -y))
##     n <- length(celltypes)
##     celltype1 <- rep(celltypes, each = n)
##     celltype2 <- rep(celltypes, times = n)
##     heatmap_data <- data.frame(celltype1, celltype2)
##     heatmap_data$Correlation <- 0
##     heatmap_data$bead_correlation <- 0
##     heatmap_data$bead_cor_pearson <- 0
##     heatmap_data$pval <- NA
##     heatmap_data$bead_pval <- NA
##     for (i in 1:nrow(heatmap_data)) {
##         if (heatmap_data[i, 1] == heatmap_data[i, 2]) {
##             heatmap_data$Correlation[i] = NA
##             heatmap_data$bead_correlation[i] = NA
##         }
##         else {
##             celltype_1 <- heatmap_data[i, 1]
##             celltype_2 <- heatmap_data[i, 2]
##             rownumber <- which(heatmap_data[i, 1] == celltype2 & 
##                 heatmap_data[i, 2] == celltype1)
##             if (heatmap_data[rownumber, "Correlation"] != 0) {
##                 heatmap_data[i, 3:7] <- heatmap_data[rownumber, 
##                   3:7]
##             }
##             else {
##                 message(as.character(heatmap_data[i, 1]), " vs ", 
##                   as.character(heatmap_data[i, 2]))
##                 cor_results = SpCor(query = input_data, x = x, 
##                   y = y, nbins = nbins, var1 = as.character(heatmap_data[i, 
##                     1]), var2 = as.character(heatmap_data[i, 
##                     2]), radius = radius)
##                 heatmap_data$Correlation[i] = as.numeric(cor_results$cor)
##                 heatmap_data$pval[i] = as.numeric(cor_results$pval)
##             }
##         }
##     }
##     colnames(heatmap_data)[3] <- "ρ"
##     hdata <- dcast(heatmap_data, formula = celltype1 ~ celltype2, 
##         fun.aggregate = sum, value.var = "ρ")
##     rownames(hdata) <- hdata$celltype1
##     hdata <- hdata %>% select(-celltype1)
##     hdata <- hclust(dist(hdata[, 1:ncol(hdata)]))
##     heatmap_data$celltype1 <- as.factor(heatmap_data$celltype1)
##     heatmap_data$celltype2 <- as.factor(heatmap_data$celltype2)
##     heatmap_data$celltype1 <- factor(x = heatmap_data$celltype1, 
##         ordered = T, levels = levels(heatmap_data$celltype1)[hdata$order])
##     heatmap_data$celltype2 <- factor(x = heatmap_data$celltype2, 
##         ordered = T, levels = levels(heatmap_data$celltype2)[hdata$order])
##     cor_plot <- ggplot(data = heatmap_data, aes(x = celltype1, 
##         y = celltype2, fill = ρ)) + geom_tile() + scale_fill_gradient2(low = "royalblue3", 
##         mid = "white", high = "red3", midpoint = 0, na.value = "grey80", 
##         limits = c(-1, 1)) + scale_y_discrete(limits = rev(levels(heatmap_data$celltype2))) + 
##         xlab(NULL) + ylab(NULL) + theme_minimal() + scale_x_discrete(position = "top") + 
##         theme(axis.text.x = element_text(hjust = 0, angle = 45), 
##             legend.position = "right", legend.direction = "vertical", 
##             axis.line = element_line(colour = "black")) + coord_fixed()
##     output <- list(cor_plot, heatmap_data)
##     return(output)
## }
## run it
results <- coloc_analysis(input_data = out[,1:10], radius=50)
## ARC vs B ASC
## Correlation: 0.307123768780942
## ARC vs B B1
## Correlation: -0.112875392718848
## ARC vs B follicular
## Correlation: -0.344787575196894
## ARC vs B MZ
## Correlation: -0.118002410500603
## ARC vs CD4+ iNKT
## Correlation: 0.16055963963991
## ARC vs CD4+ T naive
## Correlation: 0.0988102717025679
## ARC vs CD4+ Treg
## Correlation: 0.0299941694985424
## B ASC vs B B1
## Correlation: -0.184564000141
## B ASC vs B follicular
## Correlation: -0.385386091846523
## B ASC vs B MZ
## Correlation: -0.222390922597731
## B ASC vs CD4+ iNKT
## Correlation: 0.172560292140073
## B ASC vs CD4+ T naive
## Correlation: 0.118045252011313
## B ASC vs CD4+ Treg
## Correlation: 0.0601151010287753
## B B1 vs B follicular
## Correlation: 0.407072869268217
## B B1 vs B MZ
## Correlation: 0.390881302720326
## B B1 vs CD4+ iNKT
## Correlation: -0.255041535760384
## B B1 vs CD4+ T naive
## Correlation: -0.172817524204381
## B B1 vs CD4+ Treg
## Correlation: -0.122185187046297
## B follicular vs B MZ
## Correlation: 0.412039783009946
## B follicular vs CD4+ iNKT
## Correlation: -0.478726090181523
## B follicular vs CD4+ T naive
## Correlation: -0.509311492327873
## B follicular vs CD4+ Treg
## Correlation: -0.287212166803042
## B MZ vs CD4+ iNKT
## Correlation: -0.464759691689923
## B MZ vs CD4+ T naive
## Correlation: -0.550228049557012
## B MZ vs CD4+ Treg
## Correlation: -0.498910842227711
## CD4+ iNKT vs CD4+ T naive
## Correlation: 0.597278486319622
## CD4+ iNKT vs CD4+ Treg
## Correlation: 0.507495890373973
## CD4+ T naive vs CD4+ Treg
## Correlation: 0.614520378630095
## get colocalisation table
print(results[[1]])

## get numeric results
print(results[[2]])
##       celltype1    celltype2           ρ bead_correlation bead_cor_pearson
## 1           ARC          ARC          NA               NA                0
## 2           ARC        B ASC  0.30712377                0                0
## 3           ARC         B B1 -0.11287539                0                0
## 4           ARC B follicular -0.34478758                0                0
## 5           ARC         B MZ -0.11800241                0                0
## 6           ARC    CD4+ iNKT  0.16055964                0                0
## 7           ARC CD4+ T naive  0.09881027                0                0
## 8           ARC    CD4+ Treg  0.02999417                0                0
## 9         B ASC          ARC  0.30712377                0                0
## 10        B ASC        B ASC          NA               NA                0
## 11        B ASC         B B1 -0.18456400                0                0
## 12        B ASC B follicular -0.38538609                0                0
## 13        B ASC         B MZ -0.22239092                0                0
## 14        B ASC    CD4+ iNKT  0.17256029                0                0
## 15        B ASC CD4+ T naive  0.11804525                0                0
## 16        B ASC    CD4+ Treg  0.06011510                0                0
## 17         B B1          ARC -0.11287539                0                0
## 18         B B1        B ASC -0.18456400                0                0
## 19         B B1         B B1          NA               NA                0
## 20         B B1 B follicular  0.40707287                0                0
## 21         B B1         B MZ  0.39088130                0                0
## 22         B B1    CD4+ iNKT -0.25504154                0                0
## 23         B B1 CD4+ T naive -0.17281752                0                0
## 24         B B1    CD4+ Treg -0.12218519                0                0
## 25 B follicular          ARC -0.34478758                0                0
## 26 B follicular        B ASC -0.38538609                0                0
## 27 B follicular         B B1  0.40707287                0                0
## 28 B follicular B follicular          NA               NA                0
## 29 B follicular         B MZ  0.41203978                0                0
## 30 B follicular    CD4+ iNKT -0.47872609                0                0
## 31 B follicular CD4+ T naive -0.50931149                0                0
## 32 B follicular    CD4+ Treg -0.28721217                0                0
## 33         B MZ          ARC -0.11800241                0                0
## 34         B MZ        B ASC -0.22239092                0                0
## 35         B MZ         B B1  0.39088130                0                0
## 36         B MZ B follicular  0.41203978                0                0
## 37         B MZ         B MZ          NA               NA                0
## 38         B MZ    CD4+ iNKT -0.46475969                0                0
## 39         B MZ CD4+ T naive -0.55022805                0                0
## 40         B MZ    CD4+ Treg -0.49891084                0                0
## 41    CD4+ iNKT          ARC  0.16055964                0                0
## 42    CD4+ iNKT        B ASC  0.17256029                0                0
## 43    CD4+ iNKT         B B1 -0.25504154                0                0
## 44    CD4+ iNKT B follicular -0.47872609                0                0
## 45    CD4+ iNKT         B MZ -0.46475969                0                0
## 46    CD4+ iNKT    CD4+ iNKT          NA               NA                0
## 47    CD4+ iNKT CD4+ T naive  0.59727849                0                0
## 48    CD4+ iNKT    CD4+ Treg  0.50749589                0                0
## 49 CD4+ T naive          ARC  0.09881027                0                0
## 50 CD4+ T naive        B ASC  0.11804525                0                0
## 51 CD4+ T naive         B B1 -0.17281752                0                0
## 52 CD4+ T naive B follicular -0.50931149                0                0
## 53 CD4+ T naive         B MZ -0.55022805                0                0
## 54 CD4+ T naive    CD4+ iNKT  0.59727849                0                0
## 55 CD4+ T naive CD4+ T naive          NA               NA                0
## 56 CD4+ T naive    CD4+ Treg  0.61452038                0                0
## 57    CD4+ Treg          ARC  0.02999417                0                0
## 58    CD4+ Treg        B ASC  0.06011510                0                0
## 59    CD4+ Treg         B B1 -0.12218519                0                0
## 60    CD4+ Treg B follicular -0.28721217                0                0
## 61    CD4+ Treg         B MZ -0.49891084                0                0
## 62    CD4+ Treg    CD4+ iNKT  0.50749589                0                0
## 63    CD4+ Treg CD4+ T naive  0.61452038                0                0
## 64    CD4+ Treg    CD4+ Treg          NA               NA                0
##             pval bead_pval
## 1             NA        NA
## 2   5.992622e-45        NA
## 3   4.171367e-07        NA
## 4   6.356499e-57        NA
## 5   1.207024e-07        NA
## 6   5.090412e-13        NA
## 7   9.555088e-06        NA
## 8   1.799714e-01        NA
## 9   5.992622e-45        NA
## 10            NA        NA
## 11  8.768996e-17        NA
## 12  8.081458e-72        NA
## 13  7.831765e-24        NA
## 14  7.807442e-15        NA
## 15  1.194314e-07        NA
## 16  7.162767e-03        NA
## 17  4.171367e-07        NA
## 18  8.768996e-17        NA
## 19            NA        NA
## 20  1.089993e-80        NA
## 21  5.276609e-74        NA
## 22  4.584351e-31        NA
## 23  7.114625e-15        NA
## 24  4.219243e-08        NA
## 25  6.356499e-57        NA
## 26  8.081458e-72        NA
## 27  1.089993e-80        NA
## 28            NA        NA
## 29  8.152161e-83        NA
## 30 4.340543e-115        NA
## 31 1.816216e-132        NA
## 32  2.746668e-39        NA
## 33  1.207024e-07        NA
## 34  7.831765e-24        NA
## 35  5.276609e-74        NA
## 36  8.152161e-83        NA
## 37            NA        NA
## 38 1.010304e-107        NA
## 39 1.135316e-158        NA
## 40 2.333610e-126        NA
## 41  5.090412e-13        NA
## 42  7.807442e-15        NA
## 43  4.584351e-31        NA
## 44 4.340543e-115        NA
## 45 1.010304e-107        NA
## 46            NA        NA
## 47 1.127477e-193        NA
## 48 2.192105e-131        NA
## 49  9.555088e-06        NA
## 50  1.194314e-07        NA
## 51  7.114625e-15        NA
## 52 1.816216e-132        NA
## 53 1.135316e-158        NA
## 54 1.127477e-193        NA
## 55            NA        NA
## 56 5.171356e-208        NA
## 57  1.799714e-01        NA
## 58  7.162767e-03        NA
## 59  4.219243e-08        NA
## 60  2.746668e-39        NA
## 61 2.333610e-126        NA
## 62 2.192105e-131        NA
## 63 5.171356e-208        NA
## 64            NA        NA

That’s all I will cover for deconvolution. Lastly, we’ll discuss casting.

We can discuss how to perform casting at a later date but for now I’m going to show how to perform some of the downstream analysis.

Let’s load some coordinates generated with TACCO, a deconvolution algorithm, then add them to the day 0 reference dataset.

coords <- read.csv('/data/gpfs/projects/punim1445/ST_workshop/day0_puck4_cells_mapped.csv', row.names = 1) %>%
  mutate(x = x * 0.665) %>% mutate(y = y * 0.665) %>% ## convert units because I forgot when I ran this. different scaling factor for Slide-seq
  set_names(nm = c('SPATIAL_1','SPATIAL_2'))

ref <- readRDS("/data/gpfs/projects/punim1445/ST_workshop/day0_reference_scrnaseq.rds")
ref[['SPATIAL']] <- CreateDimReducObject(embeddings = as.matrix(coords))
## Warning: No assay specified, setting assay as RNA by default.

Next, we can plot some cell types.

celltypes <- c('B follicular','CD4+ T naive','Monocyte')

ref %>% subset(subset = cell_type %in% celltypes) %>%
  DimPlot(reduction = 'SPATIAL', group.by = 'cell_type', cols = rev(wes_palettes$Darjeeling1)) + 
  dimtheme

We can also test the distance from each spot to a given features. Let’s use the distance to a monocyte as an example.

First we need to isolate the monocyte coordinates as a landmark dataset.

ref %>% subset(cell_type == 'Monocyte') %>% Embeddings('SPATIAL') ->
  landmark

head(landmark)
##                         SPATIAL_1 SPATIAL_2
## D0_AAAGTCCAGCTGTTAC-1_2  867.7585  1272.079
## D0_AACCACACACTTCAGA-1_2  710.9515  2707.747
## D0_AACGAAAAGATACGAT-1_2 2160.6515  1693.822
## D0_AACTTCTGTGCCCACA-1_2 1091.9300  1456.350
## D0_AAGATAGGTATTGAGA-1_2 3068.9750  2438.954
## D0_AATCGACTCTTAAGGC-1_2 2643.7075  1364.248

Then we need to get the embeddings for the rest of the data.

ref %>% Embeddings('SPATIAL') ->
  target

Then we can use dbscan to get the distance from each point in target (all cells) to the nearest point in landmark (monocytes).

dbout <- kNN(x = landmark, query = target, k=5, sort=T)

ref %>% mutate(dist = dbout$dist[,1]) %>%
  FeaturePlot(reduction = 'SPATIAL', features = 'dist', order=T) +
  sptheme
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We can also discretise the data.

dbout <- kNN(x = landmark, query = target, k=5, sort=T)

ref %>% mutate(dist = dbout$dist[,1]) %>% mutate(dist_TF = dist < 50) %>%
  DimPlot(reduction = 'SPATIAL', group.by = 'dist_TF', order=T, cols = c('grey85','red3')) +
  dimtheme

Finally, we can test this in CD4+ T cells specifically.

ref %>% subset(cell_type == 'CD4+ T naive') %>% Embeddings('SPATIAL') ->
  target

dbout <- kNN(x = landmark, query = target, k=5, sort=T)

ref %>% subset(cell_type == 'CD4+ T naive') %>% mutate(dist = dbout$dist[,1]) %>% mutate(dist_TF = dist < 50) %>%
  DimPlot(reduction = 'SPATIAL', group.by = 'dist_TF', order=T, cols = c('grey85','red3')) +
  dimtheme

Finally, let’s export some data so we can switch over to python later.

sc <- import('scanpy')

## get metadata
obs <- subslice@meta.data

## get spatial data
spatial <- subslice[['SPATIAL']]@cell.embeddings %>% as.data.frame() %>%
  setnames(nm = c('x','y'))

## Retrieve and transpose raw counts matrix
## it should be oriented cells x genes
subslice <- t(as.matrix(GetAssayData(subslice,slot='counts')))

## Retain cell and gene names because they are not automatically added to the AnnDataset object
cell_names <- rownames(subslice)
gene_names <- colnames(subslice)

adata <- sc$AnnData(
  X   = subslice,
  obs = obs,
  obsm = spatial,
  var = data.frame(var_genes)
)
print(adata)

## add names
adata$var_names <- gene_names
adata$obs_names <- cell_names

## remove unneeded data and then garbage collect to free up some memory
rm(obs,subslice)
gc()

## Sometimes this step can take a while, sometimes not
adata$write("/data/projects/punim1445/ST_workshop/YOUR_SUBDIRECTORY/subslice.h5ad")